export NPU_ENABLE=true
#!/bin/bash


MODEL_VERSION=qwen2-5-7b-chat
PROMPT_VERSION=qwen
VE_CKPT=checkpoints/aimv2-3b-p14-448

IMG_SIZE=504
PROJ_TYPE=pixelshuffle_2x 
VE=aimv2-3b

# aimv2 3b，448输出，1024token，是否合理？

# ./data/sft/icdar2019_lsvt.json \                
# ./data/sft/mtwi_ocr_20k.json \
# ./data/sft/autoposter_76k.json \


deepspeed train_mem.py \
    --deepspeed ./scripts/zero2.json \
    --model_name_or_path ./checkpoints/$MODEL_VERSION \
    --version $PROMPT_VERSION \
    --data_path ./data/sft/minigemini_pretrain.json \
                ./data/sft/share-captioner_coco_lcs_sam_1246k_1107.json \
                ./data/sft/llava_recap_558k.json \
                ./data/sft/textmonkey_pretrain.json \
                ./data/sft/wanjuan_ocr_b3.json \
                ./data/sft/inhouse_crop_b1.json \
                ./data/sft/mtwi_ocr_20k_ocr.json \
                ./data/sft/icdar2019_lsvt_50k_ocr.json \
                ./data/sft/icdar_mlt_ocr.json \
                ./data/sft/autoposter_76k.json \
                ./data/sft/curate_ocr_b2.json \
                ./data/sft/llavar_pretrain.json \
    --image_folder ./data/images_all \
    --vision_tower $VE_CKPT \
    --mm_projector_type $PROJ_TYPE \
    --new_img_size $IMG_SIZE \
    --tune_mm_mlp_adapter True \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --bf16 True \
    --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain-$VE-$PROJ_TYPE-$IMG_SIZE \
    --num_train_epochs 1 \
    --per_device_train_batch_size 10 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 2 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 4000 \
    --save_total_limit 1 \
    --learning_rate 1e-4 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 False \
    --fp16 False \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True


    # ./data/sft/icdar_mlt_chat.json \
    #             ./data/sft/autoposter_76k_chat.json \
#                 ./data/sft/icdar2019_lsvt_chat.json \

#                 ./data/sft/inhouse_crop_b1.json \
# ./data/sft/c7s-code-feedback-66k.json \
#                 ./data/sft/c7s-scienceqa-12k.json \
#                 ./data/sft/c7s-screenqa-79k.json \

#  ./data/sft/figureqa-100k.json \
#                 ./data/sft/symbolic_tabmw_32k.json \
#                 ./data/sft/plotqa-157k.json \

deepspeed train_mem.py \
    --deepspeed ./scripts/zero2_npu.json \
    --lora_enable True --lora_r 512 --lora_alpha 256 --lora_dropout 0.05 --mm_projector_lr 1e-5 \
    --mm_vision_tower_lr 0.2e-5 \
    --model_name_or_path ./checkpoints/$MODEL_VERSION \
    --version $PROMPT_VERSION \
    --data_path ./data/sft/minigemini_instruction.json \
                ./data/sft/bunny_695k.json \
                ./data/sft/qa_cmmmu.json \
                ./data/sft/qa_mmbench.json \
                ./data/sft/multi_spot_diff.json \
                ./data/sft/multi_lrv_multi.json \
                ./data/sft/doc_reasoning.json \
                ./data/sft/autoposter_76k_chat.json \
                ./data/sft/icdar2019_lsvt_chat.json \
    --image_folder ./data/images_all \
    --vision_tower $VE_CKPT \
    --unfreeze_mm_vision_tower True \
    --new_img_size $IMG_SIZE \
    --mm_projector_type $PROJ_TYPE \
    --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain-$VE-$PROJ_TYPE-$IMG_SIZE/mm_projector.bin \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --image_aspect_ratio pad \
    --group_by_modality_length True \
    --bf16 True \
    --tf32 True \
    --output_dir ./checkpoints/llava-$MODEL_VERSION-$VE-$PROJ_TYPE-vit-lora-test \
    --num_train_epochs 1 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 32 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 1000 \
    --save_total_limit 1 \
    --learning_rate 4e-5 \
    --weight_decay 0. \
    --warmup_steps 360 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True 
